"""
FGW numbers are from lower right panel of Table 3 in FGW (2008)
"""

import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
sns.set_style("whitegrid")

# declare a string giving the directory the data is in
data_dir = 

# declare a string giving the directory where the figures should go
figure_dir = 



def FGW_func(LTV_interp):
    """
    Bring in the foreclosure hazard rate from Foote, Gerardi, & Willen (2008)
    """
    FGW_foreclosure_hazard             = np.array([7.,5.4,3.4,1.,.5,.2,.1,.01,0.])
    FGW_eq_as_percent_of_orig_mortgage = np.arange(-50,175,25)
    
    # Because of their data, FGW report foreclosure hazard as a function of 
    # (equity as a percent of original mortgage).  But it is easier to think 
    # of it as a function of LTV. So make that conversion here.
    FGW_equity          = FGW_eq_as_percent_of_orig_mortgage # assuming original mortgage was 100k
    FGW_property_value  = FGW_equity + 100. 
    FGW_LTV             = 100.* (100./FGW_property_value) # again assuming original mortgage was 100k

    # remember arrays must be sorted in INCREASING order...
    return np.interp(LTV_interp,FGW_LTV[::-1],FGW_foreclosure_hazard[::-1])


LTV_list = []
foreclosure_hazard_list = []

for ll in np.arange(40.,201.,1.):
    # Note that the max "equity as a function of original loan balance"
    # FGW report is 150.  Which, if the original loan balance is e.g. 100k,
    # corresponds to a property value of 250k, so an LTV of 100*(100/250) = 40.
    # The minimum equity they look at is -50, which corresponds to a property
    # value of 50k, so an LTV of 100 * (100/50) = 200.
    LTV_list.append(ll)
    foreclosure_hazard_list.append(FGW_func(ll))
    

##############################################################################
#### Create the FGW foreclosure hazard pic    
sns.lineplot(x = LTV_list,y = foreclosure_hazard_list)

plt.xlabel('LTV')
plt.ylabel('Relative Foreclosure Hazard Rate')
#plt.title('Foreclosure Hazard Rate as Function of LTV, Relative to LTV = 80')


plt.savefig(figure_dir + 'FGW.pdf',dpi=300.)
plt.show()
plt.close()



##############################################################################
#### Create the LTV distribution plot

MtM_dist = pd.io.parsers.read_csv(data_dir)


quarter_str = '72' #12/31/2015
weight_str = 'relative_weight_q' + quarter_str
dist_relative_to_120 =  MtM_dist[weight_str] / MtM_dist[weight_str ][120]


sns.lineplot(x = MtM_dist['LTV_bucket'][:151],y = MtM_dist[weight_str ][:151])


plt.xlabel('LTV')
plt.ylabel('Relative Frequency')

plt.savefig(figure_dir + 'MtM_cLTV_Dist_Beg2016.pdf',dpi=300.)
plt.show()
plt.close()


print('Relative frequency at 80 is ' + str((dist_relative_to_120[80])))

##############################################################################
#### Create the CMF and CDF for q72, 12/31/2015

MtM_dist = pd.io.parsers.read_csv(data_dir)

quarter_str = '72'
weight_str = 'relative_weight_q' + quarter_str


LTV_list = []
mass_list = []
for ii,ll in enumerate(np.arange(40.,201.,1.)):
    LTV_list.append(ll)
    mass_list.append(FGW_func(ll) * MtM_dist[weight_str][ii + 40])
    
sns.lineplot(x = LTV_list,y=mass_list)
plt.xlabel('LTV')
plt.ylabel('Implied Relative Frequency')
plt.savefig(figure_dir + 'MtM_Dist_Foreclosures_Beg2016.pdf')
plt.show()
plt.close()

FGW_foreclosures_at_80_vs_120 = 1./mass_list[80]
print('Relative frequency at 80 is ' + str((FGW_foreclosures_at_80_vs_120)))

CMF_list = np.cumsum(mass_list) / np.sum(mass_list)
print('% foreclosures EUW, from FGW ' + str(100*round(1-CMF_list[50],2)))

sns.lineplot(x = LTV_list,y=CMF_list)
plt.xlabel('LTV')
plt.ylabel('Implied CDF')
plt.savefig(figure_dir + 'CMF_Foreclosures_Beg2016.pdf',dpi=300.)
plt.show()
plt.close()

##############################################################################
## Create the plots over time

MtM_dist = pd.io.parsers.read_csv(data_dir)

CDF_80_list  = []
CDF_90_list  = []
CDF_100_list = []
qq_list = []

for qq in np.arange(61,93):
    quarter_str = str(qq)
    LTV_list = []
    mass_list = []
    for ii,ll in enumerate(np.arange(40.,301.,1.)):
        LTV_list.append(ll)
        mass_list.append(FGW_func(ll) * MtM_dist['relative_weight_q' + quarter_str][ii + 40])

    CMF_list = 100.*np.cumsum(mass_list) / np.sum(mass_list)
    CDF_80_list.append(CMF_list[40])
    CDF_90_list.append(CMF_list[50])
    CDF_100_list.append(CMF_list[60])
    
    # Create the list of quarters for the x-axis
    if (qq -  4*math.floor(qq/4))==0:
        year = 1998+math.floor(qq/4) - 1
        quarter = 4
    else:
        year    = 1998+math.floor(qq/4)
        quarter = (qq -  4*math.floor(qq/4))
    qq_list.append(str(year)+'Q'+str(quarter))
     
data = pd.DataFrame({'CDF80': CDF_80_list,'CDF90': CDF_90_list,'CDF100': CDF_100_list},
                    index = qq_list)

sns.lineplot(data = data)
plt.legend(labels=['LTV<=80','LTV<=90','LTV<=100'])
plt.ylabel('Percent')
plt.xticks((0,6,12,18,24,30))
plt.savefig(figure_dir + 'AW_foreclosures_over_time.pdf',dpi=300.)
plt.show()
plt.close()

